/* Do-file to generate the tables and figures for "Gender Differences in Job Search and the Earnings Gap: Evidence from the Field and the Lab"
(QJE, 2023) by Cortes, Pan, Pilossoph, Reuben, and Zafar 
Last updated: Jan 2023 */

clear all
set type double, permanently

cd "/Users/jessicapan/JPan Dropbox/Jessica Pan/current_projects/BU Job Search Analysis/Submission/qje/replication_materials/data files"

global figures "/Users/jessicapan/JPan Dropbox/Jessica Pan/current_projects/BU Job Search Analysis/Submission/qje/replication_materials/figures/"

use BU_grad_analysis_sample_aug2021, clear

************************************************************
* FIGURE 1: CDF of Job Acceptance Timing, By Gender 
************************************************************

	use BU_grad_analysis_sample_aug2021.dta, clear 
	
	replace accept_mo = -9 if accept_mo<-9 & accept_mo~=.
	replace accept_mo = 9 if accept_mo>9 & accept_mo~=. 
	
	ksmirnov accept_mo if accepted==1, by(gender) //KS test for month of accept by gender
	local ks_p: display %4.3f r(p)
	
	collapse (mean) mean=first_total_nt (count) num=first_total_nt if accepted==1, by(accept_mo gender)
	
	sort gender accept_mo
	gen raw_sum=mean*num
	by gender: gen cum_num= sum(num)
	by gender: gen cum_sum= sum(raw_sum)
	gen cum_mean=cum_sum/cum_num
		
	reshape wide mean num raw_sum cum_num cum_sum cum_mean, i(accept_mo) j(gender)
	
	egen total_num1 = max(cum_num1)
	egen total_num2 = max(cum_num2)
	
	gen prop1=cum_num1/total_num1
	gen prop2=cum_num2/total_num2
		
	twoway  (line prop2  accept_mo , lwidth(medthick)) (line prop1  accept_mo, lp(dash) lwidth(medthick)) ///
	, graphregion(color(white))  ///
	xtitle("Months Since Graduation", size(medsmall)) ytitle("Proportion Accepted a Job" " ", size(medsmall)) ///
	legend(order(1 "Male" 2 "Female") rows(1) size(medsmall)) 	///
	note("KS p-val: `ks_p'", ring(0) pos(4) size(medsmall)) scheme(s2mono) 
	
	graph export "${figures}figure1.tif", width(3000) replace

*******************************************************************************************
* Figure 2: Cumulative Mean Accepted Earnings and Gender Gap by Months Since Graduation
*******************************************************************************************

use BU_grad_analysis_sample_aug2021.dta, clear //reset dataset
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

** PANEL A **	
	
cap program drop fig2_levels
program fig2_levels, rclass
cap drop cum_mean0
cap drop cum_mean1
cap drop cum_mean0_se
cap drop cum_mean1_se
gen cum_mean0=.
gen cum_mean1=.
gen cum_mean0_se=.
gen cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace cum_mean0 = _b[_cons] in `m_pos'
	replace cum_mean0_se = _se[_cons] in `m_pos'
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace cum_mean1 = _b[_cons] in `m_pos'
	replace cum_mean1_se = _se[_cons] in `m_pos'
}
reg cum_mean0 mo
return scalar slope_g0 = _b[mo]
reg cum_mean1 mo
return scalar slope_g1 = _b[mo]
end

** PLOTTING THE FIGURE **

set more off 

// getting the coefficients
fig2_levels

reg cum_mean0 mo
local slope_g0: display %4.1f _b[mo]
reg cum_mean1 mo
local slope_g1: display %4.1f _b[mo]

cap drop ub_* lb_* log_ub_* log_lb_*
gen ub_g0 = cum_mean0 + 1.96*cum_mean0_se
gen lb_g0 = cum_mean0 - 1.96*cum_mean0_se

gen ub_g1 = cum_mean1 + 1.96*cum_mean1_se
gen lb_g1 = cum_mean1 - 1.96*cum_mean1_se

twoway  (rarea ub_g0 lb_g0 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(gs2%20)) (rarea ub_g1 lb_g1 mo if mo>=-9 & mo<=9, lwidth(none) fcolor(gs2%20))  ///
(line cum_mean0 mo if mo>=-9 & mo<=9, lcolor(gs2)) (line cum_mean1 mo if mo>=-9 & mo<=9, lcolor(gs2) lp(dash)), ylabel(50000(10000)90000) xlabel(-9(3)9) ///
graphregion(color(white)) xtitle("Months Since Graduation", size(medsmall)) ytitle("Cumulative Mean Accepted Offer ($)" " ", size(medsmall))  ///
legend(order(3 "Male" 4 "Female") size(medsmall)) name(figure2, replace) note("Male Slope: `slope_g0'***" "Female Slope: `slope_g1'***", ring(0) pos(2) size(small)) 

graph export "${figures}figure2a.tif", width(1000) replace

** PANEL B **

macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry"
macro define controls3 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry i.first_benefit_flexwork i.first_benefit_maternity i.first_benefit_paternity i.first_benefit_sickleave i.first_benefit_childcare exp_earn_growth_1yr exp_earn_growth_1yr_miss"

cap drop *_res1 
cap drop *_res2 
cap drop *_res3

set more off
reg first_total_nt $controls1
predict first_total_nt_res1, res
reg log_first_total_nt $controls1
predict log_first_total_nt_res1, res

reg first_total_nt $controls2
predict first_total_nt_res2, res
reg log_first_total_nt $controls2
predict log_first_total_nt_res2, res

reg first_total_nt $controls3
predict first_total_nt_res3, res
reg log_first_total_nt $controls3
predict log_first_total_nt_res3, res

set more off 
cap program drop gap_raw
program gap_raw, rclass
cap drop cum_gap cum_gap_se
cap drop log_cum_gap log_cum_gap_se
gen cum_gap=.
gen cum_gap_se=.
gen log_cum_gap=.
gen log_cum_gap_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt male if accepted==1 & accept_mo<=`m'
	replace cum_gap = _b[male] in `m_pos'
	replace cum_gap_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap = _b[male] in `m_pos'
	replace log_cum_gap_se = _se[male] in `m_pos'
}

reg cum_gap mo
return scalar slope_mo = _b[mo]

reg log_cum_gap mo
return scalar log_slope_mo = _b[mo]
end

set more off 
cap program drop gap_res1
program gap_res1, rclass
cap drop cum_gap_res1 cum_gap_res1_se
cap drop log_cum_gap_res1 log_cum_gap_res1_se
gen cum_gap_res1=.
gen cum_gap_res1_se=.
gen log_cum_gap_res1=.
gen log_cum_gap_res1_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res1 = _b[male] in `m_pos'
	replace cum_gap_res1_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res1 = _b[male] in `m_pos'
	replace log_cum_gap_res1_se = _se[male] in `m_pos'
}

reg cum_gap_res1 mo
return scalar slope_mo_res1 = _b[mo]

reg log_cum_gap_res1 mo
return scalar log_slope_mo_res1 = _b[mo]
end

cap program drop gap_res2
program gap_res2, rclass
cap drop cum_gap_res2 cum_gap_res2_se
cap drop log_cum_gap_res2 log_cum_gap_res2_se
gen cum_gap_res2=.
gen cum_gap_res2_se=.
gen log_cum_gap_res2=.
gen log_cum_gap_res2_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res2 = _b[male] in `m_pos'
	replace cum_gap_res2_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res2 = _b[male] in `m_pos'
	replace log_cum_gap_res2_se = _se[male] in `m_pos'
}

reg cum_gap_res2 mo
return scalar slope_mo_res2 = _b[mo]

reg log_cum_gap_res2 mo
return scalar log_slope_mo_res2 = _b[mo]
end

cap program drop gap_res3
program gap_res3, rclass
cap drop cum_gap_res3 cum_gap_res3_se
cap drop log_cum_gap_res3 log_cum_gap_res3_se
gen cum_gap_res3=.
gen cum_gap_res3_se=.
gen log_cum_gap_res3=.
gen log_cum_gap_res3_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res3 = _b[male] in `m_pos'
	replace cum_gap_res3_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res3 = _b[male] in `m_pos'
	replace log_cum_gap_res3_se = _se[male] in `m_pos'
}

reg cum_gap_res3 mo
return scalar slope_mo_res3 = _b[mo]

reg log_cum_gap_res3 mo
return scalar log_slope_mo_res3 = _b[mo]
end

** PLOTTING THE FIGURE **

// getting the coefficients

set more off
gap_raw
gap_res1
gap_res2
gap_res3

reg cum_gap mo
local slope: display %4.1f _b[mo]
reg cum_gap_res1 mo
local slope_res1: display %4.1f _b[mo]
reg cum_gap_res2 mo
local slope_res2: display %4.1f _b[mo]
reg cum_gap_res3 mo
local slope_res3: display %4.1f _b[mo]

# delimit ;
graph tw line cum_gap mo if mo>=-9 & mo<=9, msize(small) lpattern(solid) || 
line cum_gap_res1 mo if mo>=-9 & mo<=9, msize(small) lpattern(longdash) ||
line cum_gap_res2 mo if mo>=-9 & mo<=9, msize(small) lpattern(dash_dot) 
ylabel(0(5000)15000, labsize(small)) xlabel(-9(3)9, labsize(small))
xtitle("Months Since Graduation", size(small)) ytitle("Cumulative Gender Gap in Residualized Earnings ($)" "", size(small))
legend(label(1 "No Controls") label(2 "Basic Controls") label(3 "Basic Controls + Industry FE") size(small))
note("Slope (no controls): `slope'**" "Slope (w/ basic controls): `slope_res1'**" 
"Slope (w/ basic controls + industry FE): `slope_res2'**", ring(0) pos(2) size(small))
graphregion(color(white)) scheme(s2mono);

graph export "${figures}figure2b.tif", width(1000) replace;

****************************************************************************
* Figure 4: Gender Difference in Beliefs Bias – Cross-Cohort Comparison
****************************************************************************

	use BU_grad_analysis_sample_aug2021.dta, clear 
	keep if indiv_tag==1
	
	gen quest_belief_self=quest_belief_male if gender==2
	replace quest_belief_self=quest_belief_female if gender==1
	*realized (2017 cohort) or expected (2018 cohort) earnings
	gen expect_real_earn0=expected_totalpay if (cohort==2019 | cohort==2018)
	gen expect_real_earn1=first_total_nt if (cohort==2017 | cohort==2018)
	gen expect_quest0=quest_belief_self if (cohort==2019 | cohort==2018) & !missing(expected_totalpay)
	
	keep expect_real_earn* expect_quest* RespondentID gender cohort
	
	reshape long expect_real_earn expect_quest, i(RespondentID) j(real)
	keep if !missing(expect_real_earn)
	
	** Without pop beliefs
	
	ksmirnov expect_real_earn if gender==1, by(real)
	local ks_p: display %4.3f r(p)
	
	count if !missing(expect_real_earn) & gender==1 & real==1
	local n_r1=r(N)
	count if !missing(expect_real_earn) & gender==1 & real==0
	local n_r0=r(N)
	
	twoway (kdensity expect_real_earn if gender==1 & real==1, bw(6000)) ///
	(kdensity expect_real_earn if gender==1 & real==0, bw(6000) lp(dash) ), xtitle("Earnings") ///
	ytitle("Density" " ") graphregion(color(white)) title("Female") name(comp_f, replace) ///
	legend(order(1 "2017-2018 Realization" 2 "2018-2019 Expectation") size(medsmall)) xlabel(0(50000)175000) ///
	note("KS p-val: `ks_p'" "N real: `n_r1'" "N expect: `n_r0'", ring(0) pos(2) size(small)) nodraw scheme(s2mono)
	
	ksmirnov expect_real_earn if gender==2, by(real)
	local ks_p: display %4.3f r(p)
	
	count if !missing(expect_real_earn) & gender==2 & real==1
	local n_r1=r(N)
	count if !missing(expect_real_earn) & gender==2 & real==0
	local n_r0=r(N)
	
	twoway (kdensity expect_real_earn if gender==2 & real==1, bw(6000)) ///
	(kdensity expect_real_earn if gender==2 & real==0, bw(6000) lp(dash) ), xtitle("Earnings") ///
	ytitle("Density" " ") graphregion(color(white)) title("Male") name(comp_m, replace) ///
	legend(order(1 "2017-2018 Realization" 2 "2018-2019 Expectation") size(medsmall)) xlabel(0(50000)175000) ///
	note("KS p-val: `ks_p'" "N real: `n_r1'" "N expect: `n_r0'", ring(0) pos(2) size(small)) nodraw scheme(s2mono)
	
	grc1leg comp_f comp_m, name(figure4, replace) graphregion(color(white)) 
	graph export "${figures}figure4.tif", width(1000) replace
	
******************************************************************************
* Figure 5: Ex-Ante Reservation Earnings, Risk Preferences, and Overoptimism
******************************************************************************

use BU_merge_master_mar2020_v3.dta, clear

recode trait_risk_daily (1=2), gen(trait_risk_daily2)
replace trait_risk_daily2 = trait_risk_daily2 -1

recode trait_risk_finance (1=2), gen(trait_risk_finance2)
replace trait_risk_finance2 = trait_risk_finance2 -1

egen risk2 = rmean(trait_risk_daily2 trait_risk_finance2)
replace risk2 = . if trait_risk_daily2==.
sum risk2

cap drop drisk2
gen drisk2 = risk2>=5
replace drisk2=. if risk2==.

*applying CPI to earnings measures 

	* doing this separately for each cohort
	*cohort_1
	gen year = cohort_1
	merge m:1 year using cpi.dta
	drop if _merge == 2
	drop _merge

	foreach var of varlist expected_basepay_1 expected_bonuspay_1 job_basepay_1 job_bonuspay_1 reservation_wage_1{ 
	replace `var' = `var' * (base_value / value)
	}
	
	capture drop year period value base_value
	*cohort_2
	gen year = cohort_2
	merge m:1 year using cpi.dta
	drop if _merge == 2
	drop _merge

	foreach var of varlist acceptbase_2 acceptbonuspay_2{ 
	replace `var' = `var' * (base_value / value)
	}
	
	capture drop year period value base_value
	*cohort_3
	gen year = cohort_3
	merge m:1 year using cpi.dta
	drop if _merge == 2
	drop _merge

	foreach var of varlist acceptbasepay_3 acceptbonuspay_3{ 
	replace `var' = `var' * (base_value / value)
	}

	egen expected_totalpay_1 = rowtotal(expected_basepay_1 expected_bonuspay_1) if !missing(expected_basepay_1)
	label var expected_totalpay_1 "Expected total compensation"
	label var expected_basepay_1 "Expected base compensation"
	destring expectedjob_hours_1, gen(expected_hrs_1)
	label var expected_hrs_1 "Expected weekly hours"
	
	egen actual_totalpay_1 = rowtotal(job_basepay_1 job_bonuspay_1) if !missing(job_basepay_1)
	label var actual_totalpay_1 "Accepted total compensation"
	gen actual_basepay_1=job_basepay_1
	label var actual_basepay_1 "Accepted base compensation"
	destring job_hoursperweek_1, gen(actual_hrs_1)
	label var actual_hrs_1 "Accepted weekly hours"
	
	egen actual_totalpay_2 = rowtotal(acceptbase_2 acceptbonuspay_2) if !missing(acceptbase_2)
	gen actual_basepay_2=acceptbase_2
	label var actual_basepay_2 "Accepted base compensation"
	gen actual_hrs_2=acceptwklyhr_2
	label var actual_hrs_2 "Accepted weekly hours"
	
	egen actual_totalpay_3 = rowtotal(acceptbasepay_3 acceptbonuspay_3) if !missing(acceptbasepay_3)
	label var actual_totalpay_3 "Accepted total compensation"
	gen actual_basepay_3=acceptbasepay_3
	label var actual_basepay_3 "Accepted base compensation"
	gen actual_hrs_3=accepthrs_3
	label var actual_hrs_3 "Accepted weekly hours"
	
	gen actual_totalpay=actual_totalpay_3
	replace actual_totalpay=actual_totalpay_2 if missing(actual_totalpay)
	replace actual_totalpay=actual_totalpay_1 if missing(actual_totalpay)
	replace actual_totalpay=. if actual_totalpay<20000 | actual_totalpay>175000
	
	sum expected_totalpay
	replace expected_totalpay = 20000 if expected_totalpay<20000 & expected_totalpay~=.
	replace expected_totalpay = 175000 if expected_totalpay>175000 & expected_totalpay~=.
	
*Expectation Overconfidence
	gen oc=(expected_totalpay-actual_totalpay)/actual_totalpay*100 if !missing(expected_totalpay) & !missing(actual_totalpay)
	winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 
	
	gen oc1=oc
	replace oc1=100 if oc1>=100 & !missing(oc1)

** Graphs **

** Using all available data, and ex-ante risk preferences
keep if plansafterdegree_1 == "I plan to work for pay right after I receive my Bachelor’s degree" 
 
cap drop reservation_wage_1W
winsor reservation_wage_1, gen(reservation_wage_1W) p(0.025)		//winsorizing top and bottom 2.5% 
sum reservation_wage_1W

count if (reservation_wage_1<expected_totalpay_1) & reservation_wage_1~=. & expected_totalpay_1~=.
count if (reservation_wage_1>expected_totalpay_1) & reservation_wage_1~=. & expected_totalpay_1~=.

*drop those with expected total pay less than reservation wage
sum reservation_wage_1 expected_totalpay_1 if (reservation_wage_1>expected_totalpay_1) & reservation_wage_1~=. & expected_totalpay_1~=.
gen weird = (reservation_wage_1>expected_totalpay_1) & reservation_wage_1~=. & expected_totalpay_1~=.
tab weird

gen ln_reservation_wage_1W = ln(reservation_wage_1W) 
gen ln_reservation_wage_1 = ln(reservation_wage_1) 

sum reservation_wage_1 reservation_wage_1W if weird~=1
reg reservation_wage_1W risk2 i.cohort_1 if weird~=1, robust
reg reservation_wage_1W risk2 i.cohort_1 if weird~=1 & reservation_wage_1>=20000, robust

reg ln_reservation_wage_1W drisk2 i.cohort_1 if weird~=1, robust
reg ln_reservation_wage_1W drisk2 i.cohort_1 if weird~=1 & reservation_wage_1>=20000, robust

gen reservation_wage_1W_000 = reservation_wage_1W/1000

save temp_res_wages, replace	// saving temp file

reg reservation_wage_1W_000 risk2 if weird~=1 & reservation_wage_1>=20000, robust	
			local b: display %4.3fc _b[risk2]
			local N=e(N)
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
# delimit ;			
binscatter reservation_wage_1W_000 risk2 if weird~=1 & reservation_wage_1>=20000, 
xtitle("Willingness to Take Risk (Average)", size(medsmall)) ytitle("Ex-Ante Reservation Earnings" "(1,000$)", size(medsmall))
	xlabel(1(1)6) ylabel(45(5)70) graphregion(color(white)) 
	text(68 5 "Coef: `b'`stars'", place(e) size(small)) 
	text(67 5.18  "N: `N'", place(e) size(small)) color(gs2) lcolor(gs8);
	graph save reservation_wage_risk, replace;

# delimit cr
reg reservation_wage_1W_000 ocW if weird~=1 & reservation_wage_1>=20000, robust
			local b: display %4.3fc _b[ocW]
			local N=e(N)
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}
# delimit ;			
binscatter reservation_wage_1W_000 ocW if weird~=1 & reservation_wage_1>=20000, 
xtitle("Overoptimism: [(Expect - Realized)/Realized]*100%") ytitle("Ex-Ante Reservation Earnings" "(1,000$)", size(medsmall))
	xlabel() ylabel(30(10)80) graphregion(color(white)) 
	text(75 100 "Coef: `b'`stars'", place(e) size(small)) 
	text(73.5 107  "N: `N'", place(e) size(small)) color(gs2) lcolor(gs8);
	graph save reservation_wage_ocW, replace;

# delimit cr
graph combine reservation_wage_risk.gph reservation_wage_ocW.gph, graphregion(color(white)) iscale(0.7)
graph export "${figures}figure5.tif", width(1000) replace
erase reservation_wage_risk.gph
erase reservation_wage_ocW.gph

**********************************************************************
* Figure 6: Timing of Search, Risk Preferences, and Overoptimism
**********************************************************************

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)
winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

sum startsearch_mo

gen startsearch_beforegrad = startsearch_mo<0
replace startsearch_beforegrad=. if startsearch_mo==.

gen startsearch_3mbeforegrad = startsearch_mo<=-3
replace startsearch_3mbeforegrad=. if startsearch_mo==.

gen startsearch_6mbeforegrad = startsearch_mo<=-6
replace startsearch_6mbeforegrad=. if startsearch_mo==.

*Share Starting Search Before Graduation and Risk*

reg startsearch_beforegrad risk2, robust
	local N=e(N)
	local b: display %4.3fc _b[risk2]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}

binscatter startsearch_beforegrad risk2 if accepted==1,  ///
xtitle("Willingness to Take Risk (Average)", size(medsmall)) ytitle("Share Starting Search Before Graduation", size(medsmall)) ///
text(0.87 5 "Coef: `b'`stars'", place(e) size(small)) ///
text(0.855 5.18  "N: `N'", place(e) size(small)) graphregion(color(white)) color(gs2) lcolor(gs8)
graph save startsearch_risk_2, replace

** checking that the negative relationship also holds within gender **

reg startsearch_beforegrad risk2, robust
reg startsearch_beforegrad risk2 if female==1, robust
reg startsearch_beforegrad risk2 if female==0, robust
binscatter startsearch_beforegrad risk2 if accepted==1, by(female)

*Share Starting Search Before Graduation and Overconfidence*

reg startsearch_beforegrad ocW, robust
	local N=e(N)
	local b: display %4.3fc _b[ocW]
			mat table=r(table)
			local p table[4,1]
			local stars "   "
			if `p'<0.1 {
				local stars "*  "
			}
			if `p'<0.05 {
				local stars "** "
			}
			if `p'<0.01 {
				local stars "***"
			}

binscatter startsearch_beforegrad ocW if accepted==1,  ///
xtitle("Overoptimism: [(Expect - Realized)/Realized]*100%", size(medsmall)) ytitle("Share Starting Search Before Graduation", size(medsmall)) ///
text(0.9 118 "Coef: `b'`stars'", place(e) size(small)) ///
text(0.875 125  "N: `N'", place(e) size(small)) graphregion(color(white)) color(gs2) lcolor(gs8)
graph save startsearch_ocW_2, replace

graph combine startsearch_risk_2.gph startsearch_ocW_2.gph, graphregion(color(white))
graph export "${figures}figure6.tif", width(1000) replace
erase startsearch_risk_2.gph
erase startsearch_ocW_2.gph

************************************************************************************
* Figure 7: Job Search Experiment – Mean Reservation Wage in Each Round by Gender
************************************************************************************

** Cleaning the experiment data **

use jobsearch_experiment, clear
drop if female == . | treat == .
keep if treat == 3

** manually cleaning the major and race dummies
replace busecon=0 if asumajor~="" & busecon==.
replace compeng=. if asumajor==""
sum busecon compeng

replace race_white=. if race_asian==.

gen expprior = expfast_r0 / 100
egen expprior_np = cut(expprior), group(10)
	
	// standarize risk, prior, and continous control variables
	foreach stub in risk_r expprior time_ce1 time_ce2 {
		egen `stub'_std = std(`stub')
	}
	la var risk_r_std "CRRA Coefficient"
	la var expprior_std "Prior of being fast"

gen gpa = asugpa

replace country = ltrim(country)

gen us_born = 0 if country~=""
replace us_born = 1 if country=="usa" | country=="us" | country=="united states" | country=="United States of America" | country== "United States of American" | country=="United States" | country=="United States " | country=="United Staes"| country=="United states" | country=="United states " | country=="U.S" | country=="U.S.A" | country=="US" | country=="USA" | country=="USA " | country=="U.S.A." | country=="United State of America" | country == "'Murica" | country == "AMERICA" 

gen fa_educ = educdad
replace fa_educ = 99 if educdad==.

gen mo_educ = educmom
replace mo_educ = 99 if educmom==.

gen fa_educ_high = fa_educ>=4
replace fa_educ_high = . if fa_educ==99

gen mo_educ_high = mo_educ>=4
replace mo_educ_high = . if mo_educ==99

foreach var of varlist gpa us_born race_white race_asian compeng busecon fa_educ_high mo_educ_high{
	gen miss_`var' = missing(`var')
	gen `var'_wmiss = `var'
	replace `var'_wmiss = 0 if missing(`var')
	}

gen roundaccept_2to6 = roundaccept>=2
replace roundaccept_2to6 = . if roundaccept==.

gen roundaccept_3to6 = roundaccept>=3
replace roundaccept_3to6 = . if roundaccept==.

gen roundaccept_4to6 = roundaccept>=4
replace roundaccept_4to6 = . if roundaccept==.

	forvalues x=1(1)4{
		gen accept_r`x' = roundaccept <= `x'
	}
	gen accept_r5 = 1
	gen wageoffer_max = wageoffer_r1
	forvalues x = 2/5{
		replace wageoffer_max = wageoffer_r`x' if wageoffer_max < wageoffer_r`x' & wageoffer_r`x' < .
	}
	gen wageoffer_miss = wageaccept < wageoffer_max
	replace roundaccept = 5 if roundaccept> 5 

rename fast fast_glitch			//this is the indicator for a fast typist with glitch	

replace typingspeed = min(60*4, typingspeed) // Assign 4 min (the task lenght) to subjects who ran out of time

gen fast = typingspeed < 80

assert fast==fast_glitch if glitch==0

tab asuyear, gen(asuyear_)

macro define controls_nomiss "asuyear_1 asuyear_2 asuyear_3 gpa us_born fa_educ_high mo_educ_high race_white race_asian compeng busecon"
macro define controls2 "time_ce1_std time_ce2_std"

save temp_expt, replace

** PANEL A **

use temp_expt, clear

gen mean_f = .
gen lower_f = .
gen upper_f = .

gen round = .
replace round = 1 if _n==1
replace round = 2 if _n==2
replace round = 3 if _n==3
replace round = 4 if _n==4
replace round = 5 if _n==5

mean minwage_r1 if female==1 
replace mean_f = e(b)[1,1] if round==1
replace lower_f = r(table)[5,1] if round==1
replace upper_f = r(table)[6,1] if round==1

mean minwage_r2 if female==1 
replace mean_f = e(b)[1,1] if round==2
replace lower_f = r(table)[5,1] if round==2
replace upper_f = r(table)[6,1] if round==2

mean minwage_r3 if female==1 
replace mean_f = e(b)[1,1] if round==3
replace lower_f = r(table)[5,1] if round==3
replace upper_f = r(table)[6,1] if round==3

mean minwage_r4 if female==1 
replace mean_f = e(b)[1,1] if round==4
replace lower_f = r(table)[5,1] if round==4
replace upper_f = r(table)[6,1] if round==4

mean minwage_r5 if female==1
replace mean_f = e(b)[1,1] if round==5
replace lower_f = r(table)[5,1] if round==5
replace upper_f = r(table)[6,1] if round==5

gen mean_m = .
gen lower_m = .
gen upper_m = .

mean minwage_r1 if female==0 
replace mean_m = e(b)[1,1] if round==1
replace lower_m = r(table)[5,1] if round==1
replace upper_m = r(table)[6,1] if round==1

mean minwage_r2 if female==0 
replace mean_m = e(b)[1,1] if round==2
replace lower_m = r(table)[5,1] if round==2
replace upper_m = r(table)[6,1] if round==2

mean minwage_r3 if female==0 
replace mean_m = e(b)[1,1] if round==3
replace lower_m = r(table)[5,1] if round==3
replace upper_m = r(table)[6,1] if round==3

mean minwage_r4 if female==0
replace mean_m = e(b)[1,1] if round==4
replace lower_m = r(table)[5,1] if round==4
replace upper_m = r(table)[6,1] if round==4

mean minwage_r5 if female==0
replace mean_m = e(b)[1,1] if round==5
replace lower_m = r(table)[5,1] if round==5
replace upper_m = r(table)[6,1] if round==5

keep round mean_f mean_m lower_* upper_*
drop if round==.

# delimit ;
twoway (rcap lower_m upper_m round, lcolor(black)) (connected mean_m round, 
sort mcolor(gs2) lcolor(gs2) lwidth(medthick) msymbol(T)) 
(rcap lower_f upper_f round, lcolor(black)) (connected mean_f round, lpattern(dash) 
sort mcolor(gs8) lcolor(gs8) lwidth(medthick)), 
ytitle(Reservation Wage ($)) ytitle(, size(medium)) ylabel(8(4)28, labsize(medium)
format(%9.0f)) xtitle(Round) xtitle(,size(medium)) xlabel(1(1)5,labsize(medium)) name(minwage, replace) graphregion(color(white))
legend(order(2 "Male" 4 "Female") size(medsmall)) ;

# delimit cr
graph export "${figures}figure7a.tif", width(1000) replace

** PANEL B **

use temp_expt, clear

gen mean_f_v1 = .
gen lower_f_v1 = .
gen upper_f_v1 = .

gen round = .
replace round = 1 if _n==1
replace round = 2 if _n==2
replace round = 3 if _n==3
replace round = 4 if _n==4
replace round = 5 if _n==5

mean minwage_r1 if female==1 & minwage_r4~=.
replace mean_f_v1 = e(b)[1,1] if round==1
replace lower_f_v1 = r(table)[5,1] if round==1
replace upper_f_v1 = r(table)[6,1] if round==1

mean minwage_r2 if female==1 & minwage_r4~=.
replace mean_f_v1 = e(b)[1,1] if round==2
replace lower_f_v1 = r(table)[5,1] if round==2
replace upper_f_v1 = r(table)[6,1] if round==2

mean minwage_r3 if female==1 & minwage_r4~=.
replace mean_f_v1 = e(b)[1,1] if round==3
replace lower_f_v1 = r(table)[5,1] if round==3
replace upper_f_v1 = r(table)[6,1] if round==3

mean minwage_r4 if female==1 & minwage_r4~=.
replace mean_f_v1 = e(b)[1,1] if round==4
replace lower_f_v1 = r(table)[5,1] if round==4
replace upper_f_v1 = r(table)[6,1] if round==4

gen mean_m_v1 = .
gen lower_m_v1 = .
gen upper_m_v1 = .

mean minwage_r1 if female==0 & minwage_r4~=.
replace mean_m_v1 = e(b)[1,1] if round==1
replace lower_m_v1 = r(table)[5,1] if round==1
replace upper_m_v1 = r(table)[6,1] if round==1

mean minwage_r2 if female==0 & minwage_r4~=.
replace mean_m_v1 = e(b)[1,1] if round==2
replace lower_m_v1 = r(table)[5,1] if round==2
replace upper_m_v1 = r(table)[6,1] if round==2

mean minwage_r3 if female==0 & minwage_r4~=.
replace mean_m_v1 = e(b)[1,1] if round==3
replace lower_m_v1 = r(table)[5,1] if round==3
replace upper_m_v1 = r(table)[6,1] if round==3

mean minwage_r4 if female==0 & minwage_r4~=.
replace mean_m_v1 = e(b)[1,1] if round==4
replace lower_m_v1 = r(table)[5,1] if round==4
replace upper_m_v1 = r(table)[6,1] if round==4

keep round mean_f_v1 mean_m_v1 lower_*_v1 upper_*_v1
drop if round==.

# delimit ;
twoway (rcap lower_m_v1 upper_m_v1 round, lcolor(black)) (connected mean_m_v1 round, 
sort mcolor(gs2) lcolor(gs2) lwidth(medthick) msymbol(T)) 
(rcap lower_f_v1 upper_f_v1 round, lcolor(black)) (connected mean_f_v1 round, lpattern(dash) 
sort mcolor(gs8) lcolor(gs8) lwidth(medthick)), 
ytitle(Reservation Wage ($)) ytitle(, size(medium)) ylabel(12(4)28, labsize(medium)
format(%9.0f)) xtitle(Round) xtitle(,size(medium)) xlabel(1(1)5,labsize(medium)) name(minwage, replace) graphregion(color(white))
legend(order(2 "Male" 4 "Female") size(medsmall)) ;

# delimit cr
graph export "${figures}figure7b.tif", width(1000) replace

** PANEL C **

use temp_expt, clear

gen mean_f_v2 = .
gen lower_f_v2 = .
gen upper_f_v2 = .

gen round = .
replace round = 1 if _n==1
replace round = 2 if _n==2
replace round = 3 if _n==3
replace round = 4 if _n==4
replace round = 5 if _n==5

mean minwage_r1 if female==1 & minwage_r5~=.
replace mean_f_v2 = e(b)[1,1] if round==1
replace lower_f_v2 = r(table)[5,1] if round==1
replace upper_f_v2 = r(table)[6,1] if round==1

mean minwage_r2 if female==1 & minwage_r5~=.
replace mean_f_v2 = e(b)[1,1] if round==2
replace lower_f_v2 = r(table)[5,1] if round==2
replace upper_f_v2 = r(table)[6,1] if round==2

mean minwage_r3 if female==1 & minwage_r5~=.
replace mean_f_v2 = e(b)[1,1] if round==3
replace lower_f_v2 = r(table)[5,1] if round==3
replace upper_f_v2 = r(table)[6,1] if round==3

mean minwage_r4 if female==1 & minwage_r5~=.
replace mean_f_v2 = e(b)[1,1] if round==4
replace lower_f_v2 = r(table)[5,1] if round==4
replace upper_f_v2 = r(table)[6,1] if round==4

mean minwage_r5 if female==1 & minwage_r5~=.
replace mean_f_v2 = e(b)[1,1] if round==5
replace lower_f_v2 = r(table)[5,1] if round==5
replace upper_f_v2 = r(table)[6,1] if round==5

gen mean_m_v2 = .
gen lower_m_v2 = .
gen upper_m_v2 = .

mean minwage_r1 if female==0 & minwage_r5~=.
replace mean_m_v2 = e(b)[1,1] if round==1
replace lower_m_v2 = r(table)[5,1] if round==1
replace upper_m_v2 = r(table)[6,1] if round==1

mean minwage_r2 if female==0 & minwage_r5~=.
replace mean_m_v2 = e(b)[1,1] if round==2
replace lower_m_v2 = r(table)[5,1] if round==2
replace upper_m_v2 = r(table)[6,1] if round==2

mean minwage_r3 if female==0 & minwage_r5~=.
replace mean_m_v2 = e(b)[1,1] if round==3
replace lower_m_v2 = r(table)[5,1] if round==3
replace upper_m_v2 = r(table)[6,1] if round==3

mean minwage_r4 if female==0 & minwage_r5~=.
replace mean_m_v2 = e(b)[1,1] if round==4
replace lower_m_v2 = r(table)[5,1] if round==4
replace upper_m_v2 = r(table)[6,1] if round==4

mean minwage_r5 if female==0 & minwage_r5~=.
replace mean_m_v2 = e(b)[1,1] if round==5
replace lower_m_v2 = r(table)[5,1] if round==5
replace upper_m_v2 = r(table)[6,1] if round==5

keep round mean_f_v2 mean_m_v2 lower_*_v2 upper_*_v2
drop if round==.

# delimit ;
twoway (rcap lower_m_v2 upper_m_v2 round, lcolor(black)) (connected mean_m_v2 round, 
sort mcolor(gs2) lcolor(gs2) lwidth(medthick) msymbol(T)) 
(rcap lower_f_v2 upper_f_v2 round, lcolor(black)) (connected mean_f_v2 round, lpattern(dash) 
sort mcolor(gs8) lcolor(gs8) lwidth(medthick)), 
ytitle(Reservation Wage ($)) ytitle(, size(medium)) ylabel(12(4)28, labsize(medium)
format(%9.0f)) xtitle(Round) xtitle(,size(medium)) xlabel(1(1)5,labsize(medium)) name(minwage, replace) graphregion(color(white))
legend(order(2 "Male" 4 "Female") size(medsmall)) ;

# delimit cr
graph export "${figures}figure7c.tif", width(1000) replace

****************************************************
* TABLE 1: Sample Characteristics of Graduates
****************************************************

	use BU_grad_analysis_sample_aug2021.dta, clear 
	
	cap file close table
	
	file open table using "${figures}table1.tex", write replace
	forval i=1/11{
		gen major`i'= major_code==`i'
	}
	
	forval i=2013/2019{
		gen cohort`i'= cohort==`i'
	}

	*1 race per person
	foreach var of varlist race_asia_pacif race_white race_latino race_am_india {
		replace `var'=0 if !missing(`var') & race_black==1
	}
	foreach var of varlist race_asia_pacif race_white race_latino {
		replace `var'=0 if !missing(`var') & race_am_india==1
	}
	foreach var of varlist race_asia_pacif race_white {
		replace `var'=0 if !missing(`var') & race_latino==1
	}
	foreach var of varlist race_asia_pacif {
		replace `var'=0 if !missing(`var') & race_white==1
	}
	foreach var of varlist race_asia_pacif race_white race_latino race_am_india race_black{
		replace `var'=. if miss_race==1
	}
	
	*any debt
	gen any_debt=student_debt!=0 if !missing(student_debt)
	
	replace student_debt=student_debt/1000 if !missing(student_debt)
	replace parents_income=parents_income/1000 if !missing(parents_income)
	
	*Row labels
	local label1_age "Age"
	
	local label1_gpa "GPA"
	
	local label1_race_white 			"Race"
	local label2_race_white 			"White/Caucasian"
	local label2_race_black 			"Black/ African American"
	local label2_race_am_india 			"American Indian"
	local label2_race_latino 			"Hispanic/ Latino"
	local label2_race_asia_pacif		"Asian/ Pacific Islander"

	local label1_us_born "Born in U.S."		
	local label1_fa_ba "Father BA+"
	local label1_mo_ba "Mother BA+"

	local label1_conc_acc 		"Concentration"
	local label2_conc_acc 		"Accounting"
	local label2_conc_ent 		"Entrepreneurship"
	local label2_conc_fin 		"Finance"
	local label2_conc_gen_mgt	"General Management"
	local label2_conc_int_mgt 	"International Management"
	local label2_conc_law 		"Law"
	local label2_conc_mis 		"Management Info. Systems"
	local label2_conc_mkg 		"Marketing"
	local label2_conc_otm 		"Operations \& Tech. Mgmt."
	local label2_conc_ob 		"Organizational Behavior"
	
	local label1_cohort "Graduation Year"

	local label1_trait_confidence "Perceived Relative Ability (1-5)"
	
	local label1_trait_risk_daily "Risk Tolerance (1-7)"
	local label2_trait_risk_daily "Daily"
	local label2_trait_risk_finance "Financial"
	local label1_risk2 "Risk Tolerance"
	
	local label1_att_career_money "Career Values"
	local label2_att_career_money "Money"
	local label2_att_career_leader "Being a Leader"
	local label2_att_career_enjoy "Enjoying your Work"
	local label2_att_career_helpful "Helping Others"
	local label2_att_career_group "Working with Others"
	
	local label1_high_risk_daily "Percent High Risk ($\geq 6$)"
	local label1_high_risk2 "Percent High Risk ($\geq 5$)"
	local label2_high_risk_daily "Daily"
	local label2_high_risk_finance "Financial"
	
	local label1_over_conf "Percent Overconfident"
	
	local label1_cohort2013 "Cohort"
	local label2_cohort2013 "2013"
	local label2_cohort2014 "2014"
	local label2_cohort2015 "2015"
	local label2_cohort2016 "2016"
	local label2_cohort2017 "2017"
	local label2_cohort2018 "2018"
	local label2_cohort2019 "2019"
	
	local label1_worked_after_grad "Ever Accept Job Offer"

	local label1_student_debt "Student Debt(1,000s)$^{\dagger}$"
	local label1_any_debt "Proportion With Debt$^{\dagger}$"
	local label1_parents_income "Parent's Income (1,000s)$^{\dagger}$"
	
	local label1_procrastindex_3 "Procrastination Index (sd)$^{*}$"
	local label1_patience_3 "Patience (1-7)$^{*}$"
	local label1_oc1 "Exp-Real (pp)$^{*}$"

	*list of variables to show mean and stdev			
	local continuous_demos age gpa risk2 trait_confidence
	
	*list of variables to show proportion as a percentage of population
	local bin_demos race_white race_black race_am_india race_latino race_asia_pacif us_born fa_ba mo_ba ///
					conc_acc conc_ent conc_fin conc_gen_mgt conc_int_mgt conc_law conc_mis conc_mkg conc_otm conc_ob ///
					worked_after_grad cohort2013 cohort2014 cohort2015 cohort2016 cohort2017 cohort2018 cohort2019 ///
					high_risk2 
					
	//calculate sum stats
	
	foreach var of varlist `continuous_demos' {
		su `var' if accepted==1
		local `var'_mean_overall: display %3.2f `r(mean)'
		local `var'_sd_overall: display %3.2f `r(sd)'
		local `var'_n: display r(N)
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean_`i': display %3.2f `r(mean)' 
			local `var'_sd_`i': display %3.2f `r(sd)' 
		}
		ttest `var' if accepted==1, by(gender)
		local `var'_p: display %4.3f `r(p)'
	}
	
	foreach var of varlist `bin_demos' {
		su `var' if accepted==1
		local `var'_mean_overall: display %3.1f `r(mean)'*100
		local `var'_sd_overall: display %3.1f `r(sd)'*100
		local `var'_n: display r(N)
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean_`i': display %3.1f `r(mean)'*100
			local `var'_sd_`i': display %3.1f `r(sd)'*100
		}
		ttest `var' if accepted==1, by(gender)
		local `var'_p: display %4.3f `r(p)'
	}

	count if gender==2 & accepted==1
	local m_N `r(N)'
	count if gender==1 & accepted==1
	local f_N `r(N)'
	count if !mi(gender) & accepted==1
	local all_N `r(N)'

	local fwt "file write table"
	`fwt' "\begin{table}[H]\caption{Sample Characteristics } \centering \footnotesize \begin{threeparttable} \begin{tabular}{rlcccc} \toprule" _n
	
	`fwt' " && All & Men & Women & p-value \\ \hline" _n
	`fwt' " Observations && `all_N' & `m_N' & `f_N' & \\" _n

	foreach var of varlist `bin_demos' {
		`fwt' "`label1_`var'' & `label2_`var'' & ``var'_mean_overall'\% & ``var'_mean_2'\% & ``var'_mean_1'\% & ``var'_p'  \\" _n
	}

	foreach var of varlist `continuous_demos' {
		`fwt' "`label1_`var'' & `label2_`var''& ``var'_mean_overall' & ``var'_mean_2' & ``var'_mean_1' & ``var'_p'  \\" _n
		`fwt' "  &  & (``var'_sd_overall') & (``var'_sd_2') & (``var'_sd_1') & \\" _n
	} 
	
	*sample size for 2018/2019 vars
	sum student_debt if accepted==1
	local N_debt=r(N)
	sum parents_income  if accepted==1
	local N_income=r(N)
	
	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
	`fwt' "$^{\dagger}:$ 2019 cohort only.\\" _n
	`fwt' "$^{*}:$ 2018/2019 cohorts only.\\" _n
	`fwt' "Sample size debt: `N_debt'. Sample size parent income: `N_income'. \\" _n
	`fwt' "\emph{Notes:} Percent Overconfidence defined as the residual from the regression of subjective ability on gpa, major, and cohort." _n
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
	file close table

********************************************************************************
* Table 2: Summary Statistics: Initial Job Characteristics and Search Behavior
********************************************************************************
	
	use "BU_grad_analysis_sample_aug2021.dta", clear 
	
	set more off
	cap file close table
	file open table using "${figures}table2.tex", write replace
	
	*Current Industry
	gen cur_ind_accounting= current_indust_code==3
	gen cur_ind_marketing= current_indust_code==4
	gen cur_ind_consult= current_indust_code==15
	gen cur_ind_retail= current_indust_code==16
	gen cur_ind_media= current_indust_code==22 | current_indust_code==23
	gen cur_ind_finance= current_indust_code==27 | current_indust_code==29
	gen cur_ind_educ= current_indust_code==31
	gen cur_ind_health= current_indust_code==32
	gen cur_ind_other = 1-inlist(current_indust_code,3,4,15,16,22,23,27,29,31,32) if !missing(current_indust_code)
	  
	*First Industry
	gen first_ind_accounting= first_industry==1
	gen first_ind_marketing= first_industry==2
	gen first_ind_consult= first_industry==3
	gen first_ind_retail= first_industry==16
	gen first_ind_media= first_industry==11 
	gen first_ind_finance= first_industry==4
	gen first_ind_educ= first_industry==5
	gen first_ind_health= first_industry==6
	gen first_ind_other = 1-inlist(first_industry,1,2,3,16,11,4,5,6) if !missing(first_industry)
	
	*Row labels
	local label1_us_living "Currently Living in U.S."
	local label1_first_job_us "First Job in U.S."
	local label1_married "Married"
	local label1_in_relation "In Relationship"
	local label1_worked_after_grad "Ever Work After BU"
	local label1_worked_after17 "Ever Work After BU (Exclude 17-18)"
	local label1_self_emp "Self employed after school (18 only)"
	local label1_current_educ "Currently In School"
	local label1_emp_fulltime "Currently Employed Full-Time"
	local label1_emp_parttime "Currently Employed Part-Time"
	local label1_curr_self_emp "Currently Self-Employed"
	local label1_emp_unemp "Currently Unemployed"
	
	local label1_first_total_nt "First Year Total Pay"
	local label2_first_total_nt "Total Pay"
	
	local label2_resid_first_total_nt "Residualized Total Pay"
	
	local label1_current_total "Current Job Total Pay"
	local label2_current_total "Total Pay"
	
	local label1_first_ind_accounting "Accounting"
	local label1_first_ind_marketing "Advertising/Marketing"
	local label1_first_ind_consult "Consulting Services"
	local label1_first_ind_retail "Cons. Products/Retail"
	local label1_first_ind_media "Entertainment Media"
	local label1_first_ind_finance "Financial Services"
	local label1_first_ind_educ "Government/Education"
	local label1_first_ind_health "Health"
	local label1_first_ind_other "Other"
	
	gen intern_first_bin = intern_yn
	label var intern_first_bin "Interned for first job"
	gen reject_any_overconf = rejected_any if over_conf==1 & accepted==1
	label var reject_any_overconf "Rejected any offer $|$ Overconfident (3rd tercile)"
	gen first_base_nf_fulltime = first_base_nf if emp_fulltime==1 & accepted==1
	label var first_base_nf_fulltime "Offered first job base pay $|$ full-time"
	gen first_base_nt_fulltime = first_base_nt if emp_fulltime==1 & accepted==1	
	label var first_base_nf_fulltime "Negotiated first job base pay $|$ full-time"	 
	gen first_total_nt_fulltime = first_total_nt if emp_fulltime==1 & accepted==1
	label var first_total_nt_fulltime "Negotiated first job total pay $|$ full-time"
	gen reject_higher_overconf = reject_higher if over_conf==1 & accepted==1
	label var reject_higher_overconf "Rejected any higher offer $|$ Overconfident (3rd tercile)"
	gen gap_first_offer_accept = accept_mo-first_offer_mo
	label var gap_first_offer_accept "Months between first offer and acceptance"
	gen accept_first_offer18 = accept_first_offer if cohort>=2018
	label var accept_first_offer18 "Accepted first offer if cohort 2018/2019"
	gen prop_accept_grad = accept_mo<=0 if !missing(accept_mo)
	
	local label1_rejected_any "Rejected Any Offer"
	local label1_within_6mo "Accept Job Within 6 Months of Graduation"
	local label1_intern_first_bin "Interned for First Job"
	local label1_prop_accept_grad "Proportion Accept Before Grad"
	local label1_num_offer "Number of offers"
	local label1_first_offer_mo "Month of First Offer"
	local label1_offer_length_consider "Time Given to Consider (wks.)"
	local label1_referral_helped "Referral Helped" 
	local label1_accept_mo "Month Accept Offer"
	
	*list of variables to show mean and stdev
	local money_vars first_total_nt current_total 
	
	local cont_vars num_offer accept_mo offer_length_consider 
	
	*list of variables to show proportion as a percentage of population
	local bin_vars 	first_job_us worked_after_grad ///
					emp_fulltime first_ind_accounting ///
					first_ind_marketing first_ind_consult first_ind_retail first_ind_media first_ind_finance ///
					first_ind_educ first_ind_health first_ind_other ///
					rejected_any intern_first_bin referral_helped prop_accept_grad within_6mo


	*calculate sum stats for earnings vars
	foreach var of varlist `money_vars' {
		su `var' if accepted==1
		local `var'_mean_all: display %6.0fc `r(mean)'
		local `var'_sd_all: display %6.0fc `r(sd)'
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean_`i': display %6.0fc `r(mean)' 
			local `var'_sd_`i': display %6.0fc `r(sd)' 
		}
		ttest `var' if accepted==1, by(gender)
		local `var'_p: display %4.3f `r(p)'
	}
	
	*calculate sum stats for continuous vars
	foreach var of varlist `cont_vars' {
		su `var' if accepted==1
		local `var'_mean_all: display %3.2f `r(mean)'
		local `var'_sd_all: display %3.2f `r(sd)'
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean_`i': display %3.2f `r(mean)' 
			local `var'_sd_`i': display %3.2f `r(sd)' 
		}
		ttest `var' if accepted==1, by(gender)
		local `var'_p: display %4.3f `r(p)'
	}
	
	*calculate sum stats for bin vars
	foreach var of varlist `bin_vars' {
		su `var' if accepted==1
		local `var'_mean_all: display %3.1f `r(mean)'*100
		local `var'_sd_all: display %3.1f `r(sd)'*100
		forval i=1/2 {
			su `var' if gender==`i' & accepted==1
			local `var'_mean_`i': display %3.1f `r(mean)'*100
			local `var'_sd_`i': display %3.1f `r(sd)'*100
		}
		ttest `var' if accepted==1, by(gender)
		local `var'_p: display %4.3f `r(p)'
	}

	count if gender==2 & accepted==1
	local m_N `r(N)'
	count if gender==1 & accepted==1
	local f_N `r(N)'
	count if !mi(gender) & accepted==1
	local all_N `r(N)'

	local fwt "file write table"
	`fwt' "\begin{table}[H]\caption{Current Characteristics } \centering \begin{threeparttable} \begin{tabular}{rlcccc} \toprule" _n
	
	`fwt' " & All & Men & Women & p-value \\ \hline" _n
	`fwt' " Observations & `all_N' & `m_N' & `f_N' & \\" _n

	foreach var of varlist `bin_vars' {
		`fwt' "`label1_`var'' & ``var'_mean_all'\% & ``var'_mean_2'\% & ``var'_mean_1'\% & ``var'_p'  \\" _n
	}

	foreach var of varlist `money_vars' {
		`fwt' "`label1_`var''& \\$``var'_mean_all' & \\$``var'_mean_2' & \\$``var'_mean_1' & ``var'_p'  \\" _n
		`fwt' " & (``var'_sd_all') & (``var'_sd_2') & (``var'_sd_1') & \\" _n
	}
	foreach var of varlist `cont_vars' {
		`fwt' "`label1_`var''& ``var'_mean_all' & ``var'_mean_2' &``var'_mean_1' & ``var'_p'  \\" _n
		`fwt' " & (``var'_sd_all') & (``var'_sd_2') & (``var'_sd_1') & \\" _n
	}
	foreach var in `dist_vars' {
		`fwt' "& `label1_`var''& ``var'_all' & ``var'_2' & ``var'_1' & \\" _n
	}
	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}"_n
	file close table
			
***************************************************************
*	 Appending Search Behavior (Cohort 2018/19) to Table 2    * 
***************************************************************
	
	cap file close table
	
	set more off
	use postg_analysis.dta, clear
	
	keep if accepted==1
	drop if jobsearch_3==2
	
	gen yield=number_offers/numapplications_total_3
	
	sum yield if female==1
	sum yield if female==0
	
	sum number_offers numapplications_total_3 if female==1 & yield~=.
	sum number_offers numapplications_total_3 if female==0 & yield~=.
	
	gen yield_v2=(number_offers/numapplications_total_3)*100 /* offers per 100 applications */
	
	gen over_conf=over_conf_3
	replace over_conf=over_conf_1 if missing(over_conf_3)
	
	gen intern=acceptintern_3!="No" if !missing(acceptintern_3)
	replace intern=acceptintern_2!=4 if missing(intern) & !missing(acceptintern_2)
	
	replace accepttimetoaccept_3="1" if accepttimetoaccept_3=="less than a week"
	replace accepttimetoaccept_3=subinstr(accepttimetoaccept_3," weeks or more","",.)
	replace accepttimetoaccept_3=subinstr(accepttimetoaccept_3," weeks","",.)
	replace accepttimetoaccept_3=subinstr(accepttimetoaccept_3," week","",.)
	destring accepttimetoaccept_3, gen(offer_length_consider)
	
	gen reject_h_risk2=rejected_any if risk2>=5 & !missing(risk2)
	gen reject_any_overconf=rejected_any if over_conf==1
	gen two_or_more=number_offers>=2 if !missing(number_offers)
	gen three_or_more=number_offers>=3 if !missing(number_offers)
	gen intern_first_bin=intern if accepted==1
	gen num_offer=number_offers if accepted==1
	gen share_rejected = (number_offers-1)/number_offers if accepted==1
//	replace share_rejected=1 if accepted!=1 & !missing(number_offers)	
	
	gen rejected_any_lower=0 if !missing(rejected_any)
	gen rejected_any_higher=0 if !missing(rejected_any)
	forval i=1/3{
		replace rejected_any_lower=1 if offer`i'_totalpay<actual_offer_total & !missing(offer`i'_totalpay) & !missing(rejected_any)
		replace rejected_any_higher=1 if offer`i'_totalpay>actual_offer_total & !missing(offer`i'_totalpay) & !missing(rejected_any)
	}
	gen reject_higher_h_risk2=rejected_any_higher if risk2>=5 & !missing(risk2)

	egen first_offer_mo=rowmin(offer_mo offer1_receive_mo offer2_receive_mo offer3_receive_mo)
	egen last_offer_mo=rowmax(offer_mo offer1_receive_mo offer2_receive_mo offer3_receive_mo)  if num_offer>1
	
	*find time between first and last/ first and second offers
	gen temp=0
	gen second_offer_mo=last_offer_mo if num_offer==2
	foreach var of varlist offer_mo offer1_receive_mo offer2_receive_mo offer3_receive_mo{
		replace second_offer_mo=`var' if num_offer>2 & `var'<second_offer_mo & `var'>first_offer_mo & !missing(`var')
		if num_offer>2  & `var'==first_offer_mo & temp==1 & !missing(`var'){
			replace second_offer_mo=`var'
		}
		if `var'==first_offer_mo & !missing(`var'){
			replace temp=1 //temp trips if already found a var at the lowest month
		}
	}
	drop temp 
	gen gap_f_l=last_offer_mo-first_offer_mo
	gen gap_f_s=second_offer_mo-first_offer_mo
	
	cap drop neg_eff
	gen neg_eff=acceptnegeff_off_3 if acceptnegeff_off_3<20 & !missing(acceptnegeff_off_3)
	
	local var1 over_oc1
	local var0 over_conf
	gen over_oc1=oc1>=0 if !missing(oc1)
	local lab1 "OC1"
	local lab0 "Resid"
	forval i=0/1{
		gen reject_any_if_hoc`i'=rejected_any if `var`i''==1 & !missing(`var`i'')
		gen reject_any_if_loc`i'=rejected_any if `var`i''==0 & !missing(`var`i'')
		local label_reject_any_if_hoc`i' "Reject any $|$ Overconf (`lab`i'')"
		local label_reject_any_if_loc`i' "Reject any $|$ Underconf (`lab`i'')"	
		
		gen reject_higher_if_hoc`i'=rejected_any_higher if `var`i''==1 & !missing(`var`i'')
		gen reject_higher_if_loc`i'=rejected_any_higher if `var`i''==0 & !missing(`var`i'')
		local label_reject_higher_if_hoc`i' "Reject higher $|$ Overconf (`lab`i'')"
		local label_reject_higher_if_loc`i' "Reject higher $|$ Underconf (`lab`i'')"	
	}
	
	gen search_duration=activeend_mo_3-activestart_mo_3
	
	gen reject_higher_expect=0 if !missing(postg_dummy) & !missing(expected_totalpay) & accepted==1
	forval i=1/3{
		replace reject_higher_expect=1 if offer`i'_totalpay>=expected_totalpay & !missing(offer`i'_totalpay) & !missing(expected_totalpay) & accepted==1
	}
	gen reject_higher_expect_oc0=reject_higher_expect if over_conf==1
	
	gen gap_active=activeend_mo_3-activestart_mo_3
	
	gen no_searchtime_reg=regret_startlate_3==0 & regret_applate_3==0 if !missing(regret_startlate_3) & !missing(regret_applate_3)
	gen no_accepttime_reg=regret_accept_early_3==0 & regret_accept_late_3==0 if !missing(regret_accept_late_3) & !missing(regret_accept_early_3)
 
	encode usefulcareerc, gen(useful_career_center) 
	
 	*list of selected variables
	local vars activestart_mo_3 numapplications_total_3 yield_v2 searchweeklyhrs_3 useful_career_center propunderqual_3
	
	*continuous variables
	local cont_vars activestart_mo_3 numapplications_total_3 yield_v2 searchweeklyhrs_3 useful_career_center propunderqual_3

	local large_vars actual_totalpay
				
	*Make local variable labels
	local label_rejected_any_offer_before "Rejected Any Offer Before Accepting"
	local label_rejected_any "Rejected Any Offer"
	local label_rejected_any_lower "Rejected Any Offer Lower than Accepted"
	local label_rejected_any_higher "Rejected Any Offer Higher than Accepted"
	local label_reject_h_risk2 "Rejected Any Offer $|$ Risk Score $\ge$ 5"
	local label_reject_higher_h_risk2 "Rejected Higher Offer $|$ Risk Score $\ge$ 5"
	local label_num_offer "Number of Offers"
	local label_share_rejected "Share of Offers Rejected"
	local label_two_or_more "Proportion with $\ge 2$ offers"
	local label_three_or_more "Proportion with $\ge 3$ offers"
	local label_within_6mo "Accept job within 6 Months of Graduation"
	local label_intern_first_bin "Interned for First Job"
	local label_offer_length_consider "First job: time given to consider accepted offer (wks.)"
	local label_reject_any_overconf "Rejected any offer $|$ overconfident (3rd tercile)"
	local label_startsearch_mo "Month start job search"
	local label_numapplications_1mo_3 "Number of apps. 1mo."
	local label_numapplications_2mo_3 "Number of apps. 2mo."
	local label_propoverqual_3 "Proportion of Jobs Overqualified"
	local label_propunderqual_3 "Proportion of Jobs Underqualified"
	local label_activestart_mo_3 "Month Start Active Job Search"
	local label_activeend_mo_3 "Month End Active Searching"
	local label_searchweeklyhrs_3 "Hours Spent Searching Per Week"
	local label_searchweeklyapps_3 "Apps. per week"
	local label_search_regrets_3 "Any Search Timing Regrets"
	local label_regret_accept_early_3 "Regret Accepting too Early"
	local label_regret_accept_late_3 "Regret Accepting too Late"
	local label_regret_applate_3 "Regret Applying Late to Certain Jobs"
	local label_regret_startlate_3 "Regret Not Starting Search Sooner"
	local label_no_searchtime_reg "No Regrets about Search Timing"
	local label_no_accepttime_reg "No Regrets about Accept Timing"
	local label_searchsatisfication_3 "Search Satisfaction"
	local label_neg_eff "Negotiation efficiacy"
	local label_actual_totalpay "First job: accepted total comp."
	local label_first_offer_mo "Mo. of first offer"
	local label_gap_f_l "Gap: 1st and last Offer"
	local label_gap_f_s "Gap: 1st and 2nd Offer"
	local label_actual_offer_total "First job: offered total comp."
	local label_reject_higher_expect "Reject offer above baseline expect"
	local label_reject_higher_expect_oc0 "Reject offer above baseline expect $|$ overconf."
	local label_search_duration "Search Duration"	
	local label_numapplications_total_3 "Total Number of Applications"
	local label_gap_active "Months Active Search"
	local label_yield "Yield Rate (Offers Per App.)"
	local label_yield_v2 "Offers Per 100 Applications"
	local label_useful_career_center "Usefulness of Career Center in Search (1-5)" 
	local label_accept_mo "Month Accept Offer"
	
	forval i=1/2 {
		count if accepted==1 & gender==`i' & activestart_mo_3~=.
		local count_`i' `r(N)'
	}
	
	count if accepted==1 & activestart_mo_3~=.
	local count_all `r(N)'
	
	foreach var in `vars' {
		*Find the stats for all variables
		quietly su `var' if accepted==1, d
		local `var'_m_all: display %8.2fc `r(mean)'
		local `var'_med_all: display %8.2fc `r(p50)'
		local `var'_sd_all: display %8.2fc `r(sd)'
			ttest `var' if accepted==1, by(gender) //ttest all and then overwrite if chi2 test
			local `var'_p: display %4.3f `r(p)'
			forval i=1/2 {
				quietly su `var' if accepted==1 & gender==`i', d
				local `var'_m_`i': display %8.2fc `r(mean)'
				local `var'_med_`i': display %8.2fc `r(p50)'
				local `var'_sd_`i': display %8.2fc `r(sd)'
				count if !mi(`var') & gender==`i' &  accepted==1
				local `var'_N_`i' `r(N)'
			}
	}
	
	*----------Write output---------------------------------------------------------------------------------
	file open table using "${figures}table2_append.tex", write replace
	local fwt "file write table"
	
	`fwt' "\begin{table}[h]\caption{Offer Characteristics} \centering \begin{threeparttable} \resizebox{0.5\textwidth}{!}{ \begin{tabular}{lccccc} \toprule" _n
	
	`fwt' " & All & Men & Women & p-value \\ \hline" _n
	`fwt' "Search Behavior (2018/2019 cohorts only) \\ " 
	`fwt' " Observations & `count_all' & `count_2' & `count_1' & \\" _n
	
	`fwt' "\pagebreak"_n
	foreach var of varlist `cont_vars' {
		`fwt' " `label_`var'' & ``var'_m_all' & ``var'_m_2' &  ``var'_m_1' & ``var'_p' \\" _n
		`fwt' " &(``var'_sd_all') &(``var'_sd_2') & (``var'_sd_1') \\" _n
	}
	
	#delimit ;
	local footnote "";
	
	#delimit cr

	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
	`fwt' "\emph{Notes:} `footnote'"
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
		
	file close table	
	
***************************************************************
* Table 3: Gender Differences in the Timing of Job Acceptance
***************************************************************
	
	use BU_grad_analysis_sample_aug2021.dta, clear 
	replace first_industry=13 if accepted==1 & missing(first_industry)
	
	cap file close table
	file open table using "${figures}table3.tex", write replace
	
	local controls0 ""
	local controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
	local controls2 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ" 
	local controls3 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ startsearch_mo" 

	stset accept_mo if accepted==1, failure(within_6mo) origin(t -16)
	
	forval i=0/2{
		stcox female `controls`i'' if accepted==1, vce(robust)
		mat table=r(table)
		local haz_fem_b`i': display %4.3f table[1,1]
		local haz_fem_se`i': display %4.3f table[2,1]
		local haz_N`i': display %5.0f e(N)
		
		local p table[4,1]
		local asterisks ""
		if `p' < 0.01 local asterisks = "***"
		if `p' < 0.05 & `p' >= 0.01 local asterisks = "**" 
		if `p' < 0.1 & `p' >= 0.05 local asterisks = "*" 
		local haz_fem_b`i' `haz_fem_b`i''`asterisks'
		
		reg accept_mo female `controls`i'' if accepted==1, robust
		local lin_fem_b`i': display %4.3f _b[female]
		local lin_fem_se`i': display %4.3f _se[female]
		local lin_N`i': display %5.0f e(N)
		local lin_r2`i': display %5.3f e(r2)
		
		mat table=r(table)
		local p table[4,1]
		local asterisks ""
		if `p' < 0.01 local asterisks = "***"
		if `p' < 0.05 & `p' >= 0.01 local asterisks = "**" 
		if `p' < 0.1 & `p' >= 0.05 local asterisks = "*" 
		local lin_fem_b`i' `lin_fem_b`i''`asterisks'
	
	}
	
	quietly sum accept_mo if accepted==1
	local lin_mean: display %4.3f r(mean)
	
	quietly sum within_6mo if accepted==1
	local haz_mean: display %4.3f r(mean)
	
	local fwt "file write table"
	`fwt' "\begin{table}[H] \caption{Gender Differences in the Timing of Job Acceptance}\centering \begin{tabular}{l*{10}{c}}"_n
	`fwt' "  & & &  \\" 
	`fwt' "\hline \hline" _n
	`fwt' "  &  \multicolumn{3}{c}{Hazard (within 6mo.)} & & \multicolumn{3}{c}{OLS} \\ \cline{2-4} \cline{6-8}" _n
	`fwt' "  & & &  \\" _n
	`fwt' " Female & `haz_fem_b0' & `haz_fem_b1' & `haz_fem_b2'  &   & `lin_fem_b0' & `lin_fem_b1' & `lin_fem_b2'  \\" _n
	`fwt' "        & (`haz_fem_se0') & (`haz_fem_se1') & (`haz_fem_se2') &   & (`lin_fem_se0') & (`lin_fem_se1') & (`lin_fem_se2')  \\" _n
	`fwt' "  & & &  \\" 
	`fwt' " Basic controls & N & Y & Y  & & N & Y & Y \\" _n
	`fwt' " Industry & N & N & Y  & & N & N & Y  \\ " _n
	`fwt' " Mean & `haz_mean' & `haz_mean' & `haz_mean'  & & `lin_mean' & `lin_mean' & `lin_mean'  \\" _n
	`fwt' " R2 & &  & & &  `lin_r20' & `lin_r21'& `lin_r22'  \\" _n
	`fwt' " N & `haz_N0' & `haz_N1' &`haz_N2'  & & `lin_N0' & `lin_N1' & `lin_N2'   \\ \hline \hline" _n
	
	#delimit ;
	local footnote "
	Basic controls include cohort fixed effects, major fixed effects, GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. Industry controls include 
	fixed effects for 19 industry groups. Robust standard errors reported in parentheses. ***significant at the 1\% level, **5\% level, *10\% level." ;
	#delimit cr

	`fwt' "\multicolumn{10}{p{14.5cm}}{\footnotesize Note: `footnote'}" _n
	`fwt' "\end{tabular}\end{table}" _n
	file close table
	
*******************************************************************************************
* Table 4: Relationship Between Cumulative Gender Earnings Gap and Month Since Graduation
*******************************************************************************************

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1
keep if abs(accept_mo)<=15
	
gen male = 1-female
replace first_industry=13 if accepted==1 & missing(first_industry)

gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	
foreach var of varlist exp_earn_growth_1yr{
	gen `var'_miss=missing(`var')
	replace `var'=0 if missing(`var')
	}

** Residualizing **

macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry"
macro define controls3 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ i.first_industry i.first_benefit_flexwork i.first_benefit_maternity i.first_benefit_paternity i.first_benefit_sickleave i.first_benefit_childcare exp_earn_growth_1yr exp_earn_growth_1yr_miss"

* controls *

cap drop *_res1 
cap drop *_res2 
cap drop *_res3

set more off
reg first_total_nt $controls1
predict first_total_nt_res1, res
reg log_first_total_nt $controls1
predict log_first_total_nt_res1, res

reg first_total_nt $controls2
predict first_total_nt_res2, res
reg log_first_total_nt $controls2
predict log_first_total_nt_res2, res

reg first_total_nt $controls3
predict first_total_nt_res3, res
reg log_first_total_nt $controls3
predict log_first_total_nt_res3, res

set more off 
cap program drop gap_raw
program gap_raw, rclass
cap drop cum_gap cum_gap_se
cap drop log_cum_gap log_cum_gap_se
gen cum_gap=.
gen cum_gap_se=.
gen log_cum_gap=.
gen log_cum_gap_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt male if accepted==1 & accept_mo<=`m'
	replace cum_gap = _b[male] in `m_pos'
	replace cum_gap_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap = _b[male] in `m_pos'
	replace log_cum_gap_se = _se[male] in `m_pos'
}

reg cum_gap mo
return scalar slope_mo = _b[mo]

reg log_cum_gap mo
return scalar log_slope_mo = _b[mo]
end

set more off 
cap program drop gap_res1
program gap_res1, rclass
cap drop cum_gap_res1 cum_gap_res1_se
cap drop log_cum_gap_res1 log_cum_gap_res1_se
gen cum_gap_res1=.
gen cum_gap_res1_se=.
gen log_cum_gap_res1=.
gen log_cum_gap_res1_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res1 = _b[male] in `m_pos'
	replace cum_gap_res1_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res1 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res1 = _b[male] in `m_pos'
	replace log_cum_gap_res1_se = _se[male] in `m_pos'
}

reg cum_gap_res1 mo
return scalar slope_mo_res1 = _b[mo]

reg log_cum_gap_res1 mo
return scalar log_slope_mo_res1 = _b[mo]
end

cap program drop gap_res2
program gap_res2, rclass
cap drop cum_gap_res2 cum_gap_res2_se
cap drop log_cum_gap_res2 log_cum_gap_res2_se
gen cum_gap_res2=.
gen cum_gap_res2_se=.
gen log_cum_gap_res2=.
gen log_cum_gap_res2_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res2 = _b[male] in `m_pos'
	replace cum_gap_res2_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res2 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res2 = _b[male] in `m_pos'
	replace log_cum_gap_res2_se = _se[male] in `m_pos'
}

reg cum_gap_res2 mo
return scalar slope_mo_res2 = _b[mo]

reg log_cum_gap_res2 mo
return scalar log_slope_mo_res2 = _b[mo]
end

cap program drop gap_res3
program gap_res3, rclass
cap drop cum_gap_res3 cum_gap_res3_se
cap drop log_cum_gap_res3 log_cum_gap_res3_se
gen cum_gap_res3=.
gen cum_gap_res3_se=.
gen log_cum_gap_res3=.
gen log_cum_gap_res3_se=.
cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace cum_gap_res3 = _b[male] in `m_pos'
	replace cum_gap_res3_se = _se[male] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt_res3 male if accepted==1 & accept_mo<=`m'
	replace log_cum_gap_res3 = _b[male] in `m_pos'
	replace log_cum_gap_res3_se = _se[male] in `m_pos'
}

reg cum_gap_res3 mo
return scalar slope_mo_res3 = _b[mo]

reg log_cum_gap_res3 mo
return scalar log_slope_mo_res3 = _b[mo]
end

* levels *
cap program drop fig2_levels
program fig2_levels, rclass
cap drop cum_mean0 
cap drop cum_mean1
cap drop cum_mean0_se
cap drop cum_mean1_se
cap drop log_cum_mean0 log_cum_mean1 log_cum_mean0_se log_cum_mean1_se
gen cum_mean0=.
gen cum_mean1=.
gen cum_mean0_se=.
gen cum_mean1_se=.

gen log_cum_mean0=.
gen log_cum_mean1=.
gen log_cum_mean0_se=.
gen log_cum_mean1_se=.

cap drop mo
gen mo=_n-16 if _n<=32

forvalues m=-9/9{
	local m_pos=`m'+16
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace cum_mean0 = _b[_cons] in `m_pos'
	replace cum_mean0_se = _se[_cons] in `m_pos'
	reg first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace cum_mean1 = _b[_cons] in `m_pos'
	replace cum_mean1_se = _se[_cons] in `m_pos'
}

forvalues m=-9/9{
	local m_pos=`m'+16
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==0
	replace log_cum_mean0 = _b[_cons] in `m_pos'
	replace log_cum_mean0_se = _se[_cons] in `m_pos'
	reg log_first_total_nt if accepted==1 & accept_mo<=`m' & female==1
	replace log_cum_mean1 = _b[_cons] in `m_pos'
	replace log_cum_mean1_se = _se[_cons] in `m_pos'
}

reg cum_mean0 mo
return scalar slope_g0 = _b[mo]
reg cum_mean1 mo
return scalar slope_g1 = _b[mo]

reg log_cum_mean0 mo
return scalar log_slope_g0 = _b[mo]
reg log_cum_mean1 mo
return scalar log_slope_g1 = _b[mo]

end

** PLOTTING THE FIGURE **

// getting the coefficients

set logtype text 
cap log close
log using fact2_gap, replace

set more off
fig2_levels
gap_raw
gap_res1
gap_res2
gap_res3

** bootstrap **

parallel setclusters 8

set more off
capture log close
set logtype text
log using "${figures}table4_tableE1A", replace

cd "${tempfolder}"

parallel bs, exp(slope=r(slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

** in logs **
parallel bs, exp(slope=r(log_slope_mo)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_raw
parallel bs, exp(slope=r(log_slope_mo_res1)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res1
parallel bs, exp(slope=r(log_slope_mo_res2)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res2
parallel bs, exp(slope=r(log_slope_mo_res3)) reps(1000) nodrop seed(3 3 3 3 3 3 3 3): gap_res3

log close

*****************************
* Table 5: Learning Process
*****************************

*********** Full Sample ***********	

	use postg_analysis.dta, clear
	drop if !missing(actual_totalpay_1)
	gen base_A=expected_totalpay_1
	gen mid_A=expected_totalpay_2
	gen realized_A=actual_totalpay
	replace realized_A=. if jobsearch_3==2				//removing self-employed in aug2021
	
	foreach var of varlist base_A mid_A realized_A{
		forval g=1/2{
			sum `var' if gender==`g', d
			local `var'_mean_`g': display %9.0fc `r(mean)' 
			local `var'_med_`g': display %9.0fc `r(p50)'
			local `var'_sd_`g': display %9.0fc `r(sd)'
			local `var'_N_`g': display %3.0f `r(N)'
		}
	}

ttest base_A = realized_A if gender==2, unpaired
local p_base_realized_A_2: display %4.3fc `r(p)'

ttest mid_A = realized_A if gender==2, unpaired
local p_mid_realized_A_2: display %4.3fc `r(p)'

ttest base_A = mid_A if gender==2, unpaired
local p_base_mid_A_2: display %4.3fc `r(p)'

ttest base_A = realized_A if gender==1, unpaired
local p_base_realized_A_1: display %4.3fc `r(p)'

ttest mid_A = realized_A if gender==1, unpaired
local p_mid_realized_A_1: display %4.3fc `r(p)'

ttest base_A = mid_A if gender==1, unpaired
local p_base_mid_A_1: display %4.3fc `r(p)'

*********** Consistent Sample ***********	

	keep if base_dummy==1 & mid_dummy==1 & !missing(expected_totalpay_1) & !missing(expected_totalpay_2) & !missing(actual_totalpay)

	gen base=expected_totalpay_1
	
	gen mid=expected_totalpay_2
	
	gen revision = (mid - base)/base *100
	
	gen realized=actual_totalpay
	
	keep if revision<100 & !missing(revision)
	
	foreach var of varlist base mid realized{
		forval g=1/2{
			sum `var' if gender==`g', d
			local `var'_mean_`g': display %9.0fc `r(mean)' 
			local `var'_med_`g': display %9.0fc `r(p50)'
			local `var'_sd_`g': display %9.0fc `r(sd)'
			local `var'_N_`g': display %3.0f `r(N)'
		}
	}
	
ttest base = realized if gender==2
local p_base_realized_2: display %4.3fc `r(p)'

ttest mid = realized if gender==2
local p_mid_realized_2: display %4.3fc `r(p)'

ttest base = realized if gender==1
local p_base_realized_1: display %4.3fc `r(p)'

ttest mid = realized if gender==1
local p_mid_realized_1: display %4.3fc `r(p)'

signrank base = realized if gender==2, exact
local p_med_base_realized_2: display %4.3fc `r(p)'

signrank mid = realized if gender==2, exact
local p_med_mid_realized_2: display %4.3fc `r(p)'

signrank base = realized if gender==1, exact
local p_med_base_realized_1: display %4.3fc `r(p)'

signrank mid = realized if gender==1, exact
local p_med_mid_realized_1: display %4.3fc `r(p)'

	cap file close table
	file open table using "${figures}table5.tex", write replace
	local fwt "file write table"
	`fwt' "\begin{table}[h]\caption{Learning Process} \bigskip \centering \begin{threeparttable} \begin{tabular}{llcccccc} \toprule" _n
	`fwt' " & & Baseline  & Mid-Search   & Realizations & \multicolumn{2}{c}{p-value} \\"  _n
	`fwt' " & & Expectations & Expectations  & & Base = Real & Mid = Real \\ \hline" _n
	`fwt' " \multicolumn{2}{c}{A. Full Sample} & & & \\ \cline{1-2}" _n
	`fwt' " & & & & \\"_n	
	`fwt' " \multirow{3}{*}{Men} & Mean & `base_A_mean_2' &  `mid_A_mean_2' & `realized_A_mean_2' \\"_n
	`fwt' " & Median & `base_A_med_2' &  `mid_A_med_2' & `realized_A_med_2' \\"_n
	`fwt' " & Std. Dev. & `base_A_sd_2' &  `mid_A_sd_2' & `realized_A_sd_2' \\"_n
	`fwt' " & N & `base_A_N_2' &  `mid_A_N_2' & `realized_A_N_2' \\"_n
	`fwt' " & & & \\"_n
	`fwt' " \multirow{2}{*}{Women} & Mean & `base_A_mean_1' &  `mid_A_mean_1' &  `realized_A_mean_1' \\"_n
	`fwt' " & Median & `base_A_med_1' &  `mid_A_med_1'  &  `realized_A_med_1' \\"_n
	`fwt' " & Std. Dev. & `base_A_sd_1' &  `mid_A_sd_1'  &  `realized_A_sd_1' \\"_n
	`fwt' " & N & `base_A_N_1' &  `mid_A_N_1' & `realized_A_N_1' \\ \hline"_n
	`fwt' " \multicolumn{2}{c}{B. Consistent Sample} & & & \\ \cline{1-2}" _n
	`fwt' " \multirow{3}{*}{Men} & Mean & `base_mean_2' &  `mid_mean_2' & `realized_mean_2' & `p_base_realized_2' & `p_mid_realized_2' \\"_n
	`fwt' " & Median & `base_med_2' &  `mid_med_2' & `realized_med_2' & `p_med_base_realized_2' & `p_med_mid_realized_2' \\"_n
	`fwt' " & Std. Dev. & `base_sd_2' &  `mid_sd_2' & `realized_sd_2' \\"_n
	`fwt' " & N & `base_N_2' &  `mid_N_2' & `realized_N_2' \\"_n
	`fwt' " & & & \\"_n
	`fwt' " \multirow{2}{*}{Women} & Mean & `base_mean_1' &  `mid_mean_1' &  `realized_mean_1' & `p_base_realized_1' & `p_mid_realized_1' \\"_n
	`fwt' " & Median & `base_med_1' &  `mid_med_1'  &  `realized_med_1' & `p_med_base_realized_1' & `p_med_mid_realized_1' \\"_n
	`fwt' " & Std. Dev. & `base_sd_1' &  `mid_sd_1'  &  `realized_sd_1' \\"_n
	`fwt' " & N & `base_N_1' &  `mid_N_1' & `realized_N_1' \\"_n
	`fwt' "\bottomrule" _n
	`fwt' "\end{tabular}"_n
	`fwt' "\begin{tablenotes} \item[] \footnotesize"_n
	`fwt' "Note: Both samples include individuals from the 2018 and 2019 graduating cohorts. Baseline only includes those without jobs at the baseline survey. Final realizations only include those who had a job by the post-graduation survey. The full sample include all individuals who responded to the survey indicated. The consistent sample includes only individuals who answered the baseline, mid-search, and post-graduation surveys, had not accepted a job by the mid-search survey, and revised their expectations by less than 100 percent." _n
	`fwt' "\end{tablenotes} \end{threeparttable} \end{table}" _n
	file close table

***********************************************
* Table 6: Gender Gap in Reservation Earnings
***********************************************

use temp_res_wages, replace	// saving temp file

gen us_born=us_born_1
replace us_born= birthco_3=="United States" if !missing(birthco_3) & missing(us_born)

gen gpa = gpa_1

* Concentration Dummies
gen conc_acc = (major_1=="Accounting"|second_major_1=="Accounting"|third_major_1=="Accounting")
label var conc_acc "Concentration in Accounting, could be first, second, or third major_1"

gen conc_ent = (major_1=="Entrepreneurship"|second_major_1=="Entrepreneurship"|third_major_1=="Entrepreneurship")
label var conc_ent "Concentration in Entrepreneurship, could be first, second, or third major_1"

gen conc_fin = (major_1=="Finance"|second_major_1=="Finance"|third_major_1=="Finance")
label var conc_fin "Concentration in Finance, could be first, second, or third major_1"

gen conc_gen_mgt = (major_1=="General Management"|second_major_1=="General Management"|third_major_1=="General Management")
label var conc_gen_mgt "Concentration in General Management, could be first, second, or third major_1"

gen conc_int_mgt = (major_1=="International Management"|second_major_1=="International Management"|third_major_1=="International Management")
label var conc_int_mgt "Concentration in International Management, could be first, second, or third major_1"

gen conc_law = (major_1=="Law"|second_major_1=="Law"|third_major_1=="Law")
label var conc_law "Concentration in Law, could be first, second, or third major_1"

gen conc_mis = (major_1=="Management Information Systems"|second_major_1=="Management Information Systems"|third_major_1=="Management Information Systems")
label var conc_mis "Concentration in Management Information Systems, could be first, second, or third major_1"

gen conc_mkg = (major_1=="Marketing"|second_major_1=="Marketing"|third_major_1=="Marketing")
label var conc_mkg "Concentration in Marketing, could be first, second, or third major_1"

gen conc_otm = (major_1=="Operations and Technology Management"|second_major_1=="Operations and Technology Management"|third_major_1=="Operations and Technology Management")
label var conc_otm "Concentration in Operations and Technology Management, could be first, second, or third major_1"

gen conc_ob = (major_1=="Organizational Behavior"|second_major_1=="Organizational Behavior"|third_major_1=="Organizational Behavior")
label var conc_ob "Concentration in Organizational Behavior, could be first, second, or third major_1"

* parental education

gen fa_edu=fa_edu_1
replace fa_edu=fa_edu_3 if missing(fa_edu)
	
gen mo_edu=mo_edu_1
replace mo_edu=mo_edu_3 if missing(mo_edu)

gen fa_educ = .
replace fa_educ = 1 if fa_edu == "Less than HS" | fa_edu == "Did not finish high school" | fa_edu == "MIDDLE SCHOOL" | fa_edu == "My Father Has No Degrees" | fa_edu == "no degree" | fa_edu == "middle school" | fa_edu == "equivalent of middle school in ecuador " | fa_edu == "He only completed up to middle school " | fa_edu == "Did not finish high school "
replace fa_educ = 2 if fa_edu == "High School Degree"
replace fa_educ = 3 if fa_edu == "Some College/Associate Degree" | fa_edu == "Trade School" | fa_edu=="CPA"
replace fa_educ = 4 if fa_edu == "Bachelor (B.A., B.S)"
replace fa_educ = 5 if fa_edu == "Medicine (M.D.)" | fa_edu == "DDS" | fa_edu == "B.S. and Pharm. D " | fa_edu == "DMD"
replace fa_educ = 6 if fa_edu == "Masters (M.A., M.S., M.F.A.)" | fa_edu == "MBA" | fa_edu == "JD and an MBA"
replace fa_educ = 7 if fa_edu == "Law (J.D.)"
replace fa_educ = 8 if fa_edu == "Doctorate (Ph.D)" 
replace fa_educ = 9 if fa_educ ==. 
replace fa_educ = 9 if fa_edu == ""

gen mo_educ = .
replace mo_educ = 1 if mo_edu == "Less than HS" | mo_edu == "Did not finish high school" | mo_edu == "MIDDLE SCHOOL" | mo_edu == "My mother Has No Degrees" | mo_edu == "no degree" | mo_edu == "middle school" | mo_edu == "equivalent of middle school in ecuador " | mo_edu == "He only completed up to middle school " | mo_edu == "Did not finish high school " | mo_edu=="Didn't complete high school" | mo_edu == "GED"
replace mo_educ = 2 if mo_edu == "High School Degree"
replace mo_educ = 3 if mo_edu == "Some College/Associate Degree" | mo_edu == "Trade School" | mo_edu=="CPA"
replace mo_educ = 4 if mo_edu == "Bachelor (B.A., B.S)"
replace mo_educ = 5 if mo_edu == "Medicine (M.D.)" | mo_edu == "Doctor of Veterinary Medicine" | mo_edu == "DDS" | mo_edu == "B.S. and Pharm. D "
replace mo_educ = 6 if mo_edu == "Masters (M.A., M.S., M.F.A.)" | mo_edu == "MBA" | mo_edu == "JD and an MBA"
replace mo_educ = 7 if mo_edu == "Law (J.D.)"
replace mo_educ = 8 if mo_edu == "Doctorate (Ph.D)"
replace mo_educ = 9 if mo_educ ==. 
replace mo_educ = 9 if mo_edu == ""

/*
	* gen controls
	foreach var of varlist fa_educ mo_educ {
		gen _`var'_miss=missing(`var')
		replace _`var'=0 if missing(`var')
	}
*/

* race
	local white_str "White"
	local black_str "Black"
	local am_india_str "American"
	local latino_str "Hispanic"
	local asia_pacif "Asian"
	foreach race in "white" "black" "am_india" "latino" "asia_pacif"{
		gen race_`race' = strpos(race_1, "``race'_str'")!=0 if !missing(race_1)
		replace race_`race' = strpos(race_3, "``race'_str'")!=0 if !missing(race_3) & missing(race_1)
		replace race_`race'=0 if missing(race_`race')
	}
	
	gen miss_race=missing(race_1) & missing(race_3)

rename female_1 female

gen ocW_miss = ocW==.
capture drop ocW_wmiss
gen ocW_wmiss = ocW
replace ocW_wmiss = 999 if ocW==.

/*** WRITE TABLE IN LATEX ***/

	label var female "Female"
	label var risk2 "Risk Tolerance"
	label var ocW_wmiss "Overconfidence (\%)"
	label var ocW_miss "Indicator, overconfidence missing"
	label var reservation_wage_1W  "Ex-Ante Reservation Earnings"
		
	local controls0 ""
	local controls1 "i.cohort_1 conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"

	macro define controls1 "i.cohort_1 conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
	
	est clear
	eststo clear
	eststo: reg reservation_wage_1W female if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female risk2 if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female ocW_wmiss ocW_miss  if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female $controls1 if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female $controls1 risk2 if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female $controls1 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	eststo: reg reservation_wage_1W female $controls1 risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=., robust
	estadd ysumm
	
	# delimit ;
	esttab * using "${figures}table6.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  title("Gender Gap in Reservation Earnings") 
	nomti se b(%4.0f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps
	nobase addn( "Note: The dependent variable is ex-ante reservation earnings in 2017 dollars. Basic controls include cohort fixed effects, major fixed effects, 
	GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
	Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
	keep(female risk2 ocW_wmiss ocW_miss) 
	indicate("Controls = gpa", labels("X" "")) replace;	

******************** testing for stastistical difference in coefficient after including risk aversion and over-confidence - manually change the .tex file to report them;
 
 #delimit cr

 eststo clear
	eststo: reg reservation_wage_1W female if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m1
	
 eststo: reg reservation_wage_1W female risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m2
	
	eststo: reg reservation_wage_1W female $controls1 if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m3
	
	eststo: reg reservation_wage_1W female $controls1 risk2 ocW_wmiss ocW_miss if weird~=1 & reservation_wage_1>=20000 & risk2~=.
	estadd ysumm
	estimates store m4
	
suest m1 m2, vce(robust)
test [m1_mean]female = [m2_mean]female
		
/*
. test [m1_mean]female = [m2_mean]female  

 ( 1)  [m1_mean]female - [m2_mean]female = 0

           chi2(  1) =    7.42
         Prob > chi2 =    0.0065

*/
	
suest m3 m4, vce(robust)
test [m3_mean]female = [m4_mean]female
	
/*
	
. test [m3_mean]female = [m4_mean]female  

 ( 1)  [m3_mean]female - [m4_mean]female = 0

           chi2(  1) =    6.04
         Prob > chi2 =    0.0140
		 
*/

***************************************************
* Table 7: Gender Gap in Timing of Starting Search
***************************************************

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)

winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

	replace first_industry=13 if accepted==1 & missing(first_industry)

	gen first_benefit_matorpat= (first_benefit_maternity==1 | first_benefit_paternity==1) if !missing(first_benefit_maternity) & !missing(first_benefit_maternity)
	foreach var of varlist exp_earn_growth_1yr{
		gen `var'_miss=missing(`var')
		replace `var'=0 if missing(`var')
	}
	
	label var female "Female"
	label var risk2 "Risk Tolerance (1-6)"
	label var over_conf "Overconfident (0/1)"
	label var oc1 "(exp-real)/real*100"
	label var exp_earn_growth_1yr "12mo Exp. Earn Grow"
	
	label var first_benefit_maternity "Maternity Leave"
	label var first_benefit_paternity "Paternity Leave" 
	label var first_benefit_flexwork "Flexible Hours"
	label var first_benefit_sickleave "Sick Leave"
	label var first_benefit_childcare "Childcare"
	label var offer_weekly_hrs "Weekly Hours"
	label var first_benefit_matorpat "Parental Leave"
	
	gen debt_1000=student_debt/1000 if !missing(student_debt)
	
	label var female "Female"
	label var risk2 "Risk Tolerance"
	label var startsearch_mo "Mo. Start Search"
	label var oc1 "Exp-Real (pp)"
	label var confidence_measure "Residual Overconf."
	label var procrastindex_3 "Procrast"
	label var patience_3 "Patience"
	label var high_risk2 "Risk Tol. $\geq$ 5"
	label var confidence_measure "Overconfidence (Resid)"
	label var first_hours "Hours per Week"
	label var debt_1000 "Debt (1,000)"
	
	gen risk_tol_lab=""
	replace risk2=floor(risk2)
	forval i=1(2)6{
		replace risk_tol_lab="Risk Tol. = `i'" if risk2==`i'
	}
	
	replace offer_weekly_hrs = 999 if offer_weekly_hrs==.
	gen offer_weekly_hrs_miss = offer_weekly_hrs==999
	replace offer_weekly_hrs_miss=. if offer_weekly_hrs==.
	
	set more off

	sum startsearch_mo

gen ocW_miss = ocW==.
capture drop ocW_wmiss
gen ocW_wmiss = ocW
replace ocW_wmiss = 999 if ocW==.

	label var ocW_wmiss "Overconfidence (\%)"

gen startsearch_beforegrad = startsearch_mo<0
replace startsearch_beforegrad=. if startsearch_mo==.

	macro define controls0 ""
	macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
	macro define controls2 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ" 
	macro define controls3 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ startsearch_mo" 

	eststo clear
	eststo: reg startsearch_beforegrad female, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female risk2, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female ocW_wmiss ocW_miss, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female risk2 ocW_wmiss ocW_miss, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female $controls1, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female risk2 $controls1, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female ocW_wmiss ocW_miss $controls1, robust
	estadd ysumm
	eststo: reg startsearch_beforegrad female risk2 ocW_wmiss ocW_miss $controls1, robust
	estadd ysumm

	# delimit ;
	esttab * using "${figures}table7.tex", l stats(ymean r2 N, fmt(3 3 0) labels("Mean" "\(R^{2}\)" "N"))  
	nomti se b(%4.3f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps nonotes title("")
	substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{15cm}})
	nobase addn( "Note: The dependent variable is a dummy variable for starting search before graduation. Basic controls include cohort fixed effects, major fixed effects, 
	GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
	keep(female risk2 ocW_wmiss) 
	indicate("Controls = gpa", labels("X" "")) 
	prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
	`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
	`"\caption{Gender Gap in Timing of Starting Search}"'
	`"\bigskip"'
	`"\begin{tabular}{l*{@M}{c}}"'
	`"\toprule"' )
	replace;

************************************************************************************************************
* Table 8: Gender Gap in Accepted Earnings, Controlling for Risk Preferences and Proxies for Biased Beliefs
************************************************************************************************************

use BU_grad_analysis_sample_aug2021.dta, clear 
keep if accepted==1

gen oc=(expected_totalpay-first_total_nt)/first_total_nt*100 if !missing(expected_totalpay) & !missing(first_total_nt)

winsor oc, gen(ocW) p(0.025)		//winsorizing top and bottom 2.5% 

replace first_industry=13 if accepted==1 & missing(first_industry)
macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"

replace offer_weekly_hrs = 999 if offer_weekly_hrs==.
gen offer_weekly_hrs_miss = offer_weekly_hrs==999
replace offer_weekly_hrs_miss=. if offer_weekly_hrs==.

macro define controls0 ""
macro define controls1 "i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ"
macro define controls2 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ" 
macro define controls3 "i.first_industry i.cohort conc_* i.us_born race_white race_black race_am_india race_latino race_asia_pacif miss_race gpa i.fa_educ i.mo_educ startsearch_mo" 

label var expected_totalpay "Expected Total Compensation"
label var trait_confidence "Perceived Relative Ability (1-5)"
label var female "Female"
label var risk2 "Risk Tolerance"

est clear
eststo clear

eststo: reg first_total_nt female $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_nt female risk2 $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_nt female expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: reg first_total_nt female risk2 expected_totalpay $controls1 if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=., robust
estadd ysumm

# delimit ;
esttab * using "${figures}table8b.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.1f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is total accepted earnings in the first year in 2017 dollars. Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 expected_totalpay) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

***************** CHECK IF COEFFICIENTS ARE STATISTICALLY DIFFERENT *************;
		
#delimit cr
	
eststo: reg first_total_nt female $controls1 if expected_totalpay~=.
estimates store a1
	
eststo: reg first_total_nt female risk2 expected_totalpay $controls1 if expected_totalpay~=.
estimates store a2
	
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a3
	
eststo: xi: reg first_total_nt female risk2 expected_totalpay $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss if expected_totalpay~=.
estimates store a4
	
suest a1 a2, vce(robust)
test [a1_mean]female = [a2_mean]female	

/*
. test [a1_mean]female = [a2_mean]female  

 ( 1)  [a1_mean]female - [a2_mean]female = 0

           chi2(  1) =    6.25
         Prob > chi2 =    0.0124
*/

suest a3 a4, vce(robust)
test [a3_mean]female = [a4_mean]female	
test [a3_mean]female = [a4_mean]female	= 0

/*. test [a3_mean]female = [a4_mean]female  

 ( 1)  [a3_mean]female - [a4_mean]female = 0

           chi2(  1) =    4.08
         Prob > chi2 =    0.0433
*/

/*2nd Proxy*/

eststo clear
eststo: reg first_total_nt female $controls1, robust
estadd ysumm
eststo: reg first_total_nt female risk2 $controls1, robust
estadd ysumm
eststo: reg first_total_nt female trait_confidence $controls1, robust
estadd ysumm
eststo: reg first_total_nt female trait_confidence risk2 $controls1, robust
estadd ysumm
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_nt female trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm
eststo: xi: reg first_total_nt female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss, robust
estadd ysumm

# delimit ;
esttab * using "${figures}table8a.tex", l stats(ymean r2 N, fmt(0 3 0) labels("Mean" "\(R^{2}\)" "N"))  
nomti se b(%4.0f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
booktabs collabels(none) gaps nonotes title("")
substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{16cm}})
nobase addn( "Note: The dependent variable is total accepted earnings in the first year in 2017 dollars. Basic controls include cohort fixed effects, major fixed effects, 
GPA, dummy for US-born, and fixed effects for race, father's education, and mother's education. 
Additional controls include fixed effects for industry (19 groups), dummies for the location of the first job (country/state), and weekly hours of work. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level.") 
keep(female risk2 trait_confidence) 
indicate("Controls = gpa" "Add. controls = _Ifirst_loc_2", labels("X" ""))
prehead(`"\begin{table}[htbp]\centering"' `"\footnotesize"'
`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
`"\caption{Gender Gap in Accepted Earnings}"'
`"\bigskip"'
`"\begin{tabular}{l*{@M}{c}}"'
`"\toprule"' )
replace;

#delimit cr
	
eststo: reg first_total_nt female $controls1
estimates store b1
	
eststo: reg first_total_nt female trait_confidence risk2 $controls1
estimates store b2
	
eststo: xi: reg first_total_nt female $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b3
	
eststo: xi: reg first_total_nt female risk2 trait_confidence $controls1 i.first_industry i.first_location offer_weekly_hrs offer_weekly_hrs_miss
estimates store b4
	
suest b1 b2, vce(robust)
test [b1_mean]female = [b2_mean]female	

/*. test [b1_mean]female = [b2_mean]female  

 ( 1)  [b1_mean]female - [b2_mean]female = 0

           chi2(  1) =   19.97
         Prob > chi2 =    0.0000
*/

suest b3 b4, vce(robust)
test [b3_mean]female = [b4_mean]female	

/*. test [b3_mean]female = [b4_mean]female  

 ( 1)  [b3_mean]female - [b4_mean]female = 0

           chi2(  1) =   10.41
         Prob > chi2 =    0.0013
*/

***********************************************************
* Table 9: Gender Gap in Reservation Wage in Round 1 (Lab)
***********************************************************

use temp_expt, clear

la var time_ce1_std "Time preference (today vs. 4 weeks)"
la var time_ce2_std "Time preference (4 weeks vs. 8 weeks)"

macro define base_controls "gpa_wmiss us_born_wmiss miss_gpa miss_us_born i.asuyear fa_educ_high_wmiss mo_educ_high_wmiss miss_fa_educ_high miss_mo_educ_high race_white_wmiss race_asian_wmiss miss_race_white miss_race_asian compeng_wmiss busecon_wmiss miss_compeng miss_busecon time_ce1_std time_ce2_std"

	est clear
	eststo clear
	eststo: reg minwage_r1 female fast 1.glitch, r
	estadd ysumm
	eststo: reg minwage_r1 female fast risk_r_std 1.glitch, r 
	estadd ysumm
	eststo: reg minwage_r1 female fast expprior_std 1.glitch, r 
	estadd ysumm
	eststo: reg minwage_r1 female fast risk_r_std expprior_std 1.glitch, r 
	estadd ysumm

	eststo: reg minwage_r1 female fast 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg minwage_r1 female fast risk_r_std 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg minwage_r1 female fast expprior_std 1.glitch $base_controls, r 
	estadd ysumm
	eststo: reg minwage_r1 female fast risk_r_std expprior_std 1.glitch $base_controls, r 
	estadd ysumm
	
	# delimit ;
	esttab * using "${figures}table9.tex", l stats(ymean r2 N, fmt(2 2 0) labels("Mean" "\(R^{2}\)" "N"))
	nomti se b(%4.2f) starlevels(* 0.10 ** 0.05 *** 0.01) style(tex) 
	booktabs collabels(none) gaps nonotes title("")
	substitute([htbp] [!htbp] \begin{tabular} \small\begin{tabular} {l} {p{19cm}})
	nobase addn("Note: The dependent variable is reservation wages in round 1. Controls include dummies for year of study, GPA, dummy for US-born, race dummies, dummy variables for college-graduate father/mother, separate indicator variables for majoring in engineering/computing and business/economics, and choices in the time preferences elicitation task. Robust standard errors in parentheses. ***significant at the 1\% level, **5\% level, *10\% level") 
	keep(female risk_r_std expprior_std fast time_ce1_std time_ce2_std) 
	indicate("Controls = gpa_wmiss", labels("X" ""))
	prehead(`"\begin{table}[htbp]\centering"' 
	`"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
	`"\caption{Gender Gap in Reservation Wage in Round 1 (Lab)} \label{table:expt_gendergap}"'
	`"\bigskip"'
	`"\begin{tabular}{l*{@M}{c}}"'
	`"\toprule"' 
	`"& & \multicolumn{7}{c}{Dependent Variable: Reservation Wage in Round 1}  \\ \cline{2-9}"' )
	replace;	

#delimit cr
** p-value for the difference in coefficients across specifications **
	
	eststo clear
	eststo: reg minwage_r1 female fast 1.glitch
	estadd ysumm
	estimates store e1
	
	eststo: reg minwage_r1 female risk_r_std expprior_std fast 1.glitch
	estadd ysumm
	estimates store e2
	
	eststo: reg minwage_r1 female fast 1.glitch $base_controls
	estadd ysumm
	estimates store e3
	
	eststo: reg minwage_r1 female risk_r_std expprior_std fast 1.glitch $base_controls
	estadd ysumm
	estimates store e4
	
	suest e1 e2, vce(robust)
	test [e1_mean]female = [e2_mean]female
	
/*  ( 1)  [e1_mean]female - [e2_mean]female = 0

           chi2(  1) =   12.59
         Prob > chi2 =    0.0004
*/

	suest e3 e4, vce(robust)
	test [e3_mean]female = [e4_mean]female

/*.         test [e3_mean]female = [e4_mean]female

 ( 1)  [e3_mean]female - [e4_mean]female = 0

           chi2(  1) =   13.60
         Prob > chi2 =    0.0002
*/







